clear all
set mo off
*set your own directory: cd ""

********************************************************************************
**************** Survey data on violence against women *************************
********************************************************************************

use datos1999, clear
append using datos2002
append using datos2005

replace year=2006 if year==2005 /* After checking when the interviews were conducted, corrected year is 2006 instead of 2005*/

* Domestic violence (from any household member):
	gen dv=0
	global x "Q19A Q19B Q19D Q19F Q19G Q19H Q19I Q19L Q19N Q19O Q19V Q19W Q19Y"
		foreach x in $x {
		replace dv=1 if `x'==1 | `x'==2 
		}
		/* I get rid of the missing values: respondents who answered "NS/NC" to all of the 13 questions */
	replace dv=. if Q19A==5 & Q19B==5 & Q19D==5 & Q19F==5 & Q19G==5 & Q19H==5 & Q19I==5 & Q19L==5 & Q19N==5 & Q19O==5 & Q19V==5 & Q19W==5 & Q19Y==5 
		
* Intimate-partner violence
	gen ipv=.
	#delimit;
	global y "Q20A1 Q20A2 Q20B1 Q20B2 Q20D1 Q20D2 Q20F1 Q20F2 Q20G1 Q20G2 Q20H1 Q20H2 Q20I1 Q20I2 Q20L1 Q20L2
	 Q20N1 Q20N2 Q20O1 Q20O2 Q20V1 Q20V2 Q20W1 Q20W2 Q20Y1 Q20Y2" ;
	#delimit cr
		foreach y in $y {
		replace ipv=1 if dv==1 & `y'==1
		replace ipv=0 if dv==1 & ipv==. & (`y'==2 | `y'==3 | `y'==4 | `y'==5 | `y'==6 | `y'==7) 
		}
	replace ipv=0 if dv==0
	
* Different types of domestic violence 
		gen str_v=0 /*structural violence*/
		replace str_v=1 if dv==1 & (Q19F==1 | Q19F==2 | Q19H==1 | Q19H==2)
		replace str_v=. if dv==.
		
		gen str_ipv=0 /* structural violence from intimate-partner */
		replace str_ipv=1 if (Q20F1==1 | Q20F2==1 | Q20H1==1 | Q20H2==1) & str_v==1
		replace str_ipv=. if ipv==.
		
		gen eco_v=0 /* economic violence */
		replace eco_v=1 if dv==1 & (Q19B==1 | Q19B==2)
		replace eco_v=. if dv==.

		gen eco_ipv=0 /* economic violence from intimate-partner */
		replace eco_ipv=1 if (Q20B1==1 | Q20B2==1) & eco_v==1
		replace eco_ipv=. if ipv==.
		
		gen spi_v=0 /*spiritual violence */
		replace spi_v=1 if dv==1 & (Q19V==1 | Q19V==2)
		replace spi_v=. if dv==.
		
		gen spi_ipv=0 /* spiritual violence from intimate-partner */
		replace spi_ipv=1 if (Q20V1==1 | Q20V2==1) & spi_v==1
		replace spi_ipv=. if ipv==.
		
		gen psy_v=0 /* psychological violence */
		#delimit;
		replace psy_v=1 if dv==1 & (Q19A==1 | Q19A==2 | Q19D==1 | Q19D==2 | Q19I==1 | Q19I==2 | Q19N==1 | Q19N==2 
		| Q19O==1 | Q19O==2 | Q19W==1 | Q19W==2 | Q19Y==1 | Q19Y==2) ;
		#delimit cr
		replace psy_v=. if dv==.
		
		gen psy_ipv=0 /* psychological violence from intimate-partner */
		#delimit;
		replace psy_ipv=1 if (Q20A1==1 | Q20A2==1 |Q20D1==1 | Q20D2==1 | Q20I1==1 | Q20I2==1 | Q20N1==1 | Q20N2==1
		| Q20O1==1 | Q20O2==1 | Q20W1==1 | Q20W2==1 |Q20Y1==1 | Q20Y2==1) & psy_v==1 ;
		#delimit cr
		replace psy_ipv=. if ipv==.
		
		gen sex_v=0 /*sexual violence */
		replace sex_v=1 if dv==1 & (Q19G==1 | Q19G==2 )
		replace sex_v=. if dv==.
		
		gen sex_ipv=0 /*sexual violence from intimate-partner */
		replace sex_ipv=1 if (Q20G1==1 | Q20G2==1) & sex_v==1
		replace sex_ipv=. if ipv==.
		
		gen phy_v=0 /* physical violence */
		replace phy_v=1 if dv==1 & (Q19L==1 | Q19L==2 )
		replace phy_v=. if dv==.
		
		gen phy_ipv=0 /* physical violence from intimate-partner */
		replace phy_ipv=1 if (Q20L1==1 | Q20L2==1) & phy_v==1
		replace phy_ipv=. if ipv==.
		
			gen str_eco_spi_psy_ipv=0 /*structural, econonomic, spiritual or psychological violence from intimate-partner */
			replace str_eco_spi_psy_ipv=1 if (str_ipv==1 | eco_ipv==1 | spi_ipv==1 | psy_ipv==1)
			replace str_eco_spi_psy_ipv=. if ipv==.
					
			gen phy_sex_ipv=0 /*physical or sexual violence from intimate-partner */
			replace phy_sex_ipv=1 if (phy_ipv==1 | sex_ipv==1)
			replace phy_sex_ipv=. if ipv==.
			
* Continuous measure of IPV
	egen sum_dv=anycount(Q19A Q19B Q19D Q19F Q19G Q19H Q19I Q19L Q19N Q19O Q19V Q19W Q19Y), values(1 2)
	replace sum_dv=. if dv==.
	egen sum_ipv=anycount(Q20A1 Q20A2 Q20B1 Q20B2 Q20D1 Q20D2 Q20F1 Q20F2 Q20G1 Q20G2 Q20H1 Q20H2 Q20I1 Q20I2 Q20L1 Q20L2 Q20N1 Q20N2 Q20O1 Q20O2 Q20V1 Q20V2 Q20W1 Q20W2 Q20Y1 Q20Y2), values(1)
	replace sum_ipv=0 if sum_dv==0
	replace sum_ipv=. if sum_dv==.

* Socio-demographic variables
	rename PROVIN provin
	rename TAMANO city
	rename Q1 age
	rename Q2B civil_status
		replace civil_status=. if civil_status==6 /*remove 18 missing values*/
		
	gen occup_status=.
	replace occup_status=1 if (Q11==1 | Q11==2 | Q11==3 | Q11==4) & year==2006 /* working*/
	replace occup_status=2 if (Q11==5 | Q11==6) & year==2006 /*unemployed*/
	replace occup_status=3 if (Q11==7 | Q11==8 | Q11==9) & year==2006 /*inactive or pension recipient */
	replace occup_status=1 if Q11==1 & (year==1999 | year==2002)
	replace occup_status=2 if Q11==2 & (year==1999 | year==2002)
	replace occup_status=3 if (Q11==3 | Q11==4 | Q11==5) & (year==1999 | year==2002)
	
	gen refe=Q10 
	replace refe=0 if Q10==2
	replace refe=. if Q10==3
	
	gen children=.
	replace children=1 if Q7==1
	replace children=0 if Q7==2
	
	gen partner=0
		local i=1
		forvalues i=1/8 {
		replace partner=1 if Q3==`i'
		local i=`i'+1
			}
	replace partner=. if Q3==99
	
	gen catholic=0
	replace catholic=1 if Q25==1 | Q25==2
	replace catholic=. if Q25==21
	replace catholic=. if Q25==.
	
	gen n_household=.
	replace n_household=Q9 if Q9<=10 
	
	/*education (notice that 2006 dataset has different values of this variable;
	labels after the 'append' are only valid for 1999 and 2002) */	
	gen prim=0 
	replace prim=1 if Q6==1
	replace prim=1 if (Q6==2 | Q6==3) & year==2006

	gen secon=0 
	replace secon=1 if (Q6==2 | Q6==3) & (year==1999 | year==2002)
	replace secon=1 if (Q6==4 | Q6==5 | Q6==6 | Q6==7) & year==2006

	gen univ=0
	replace univ=1 if Q6==4 & (year==1999 | year==2002)
	replace univ=1 if (Q6==8 | Q6==9) & year==2006

	gen educ=.
	replace educ=1 if prim==1
	replace educ=2 if secon==1
	replace educ=3 if univ==1
	
	/* partner's education*/	
	gen prim_partner=0 
	replace prim_partner=1 if Q17==1
	replace prim_partner=1 if (Q17==2 | Q17==3) & year==2006

	gen secon_partner=0 
	replace secon_partner=1 if (Q17==2 | Q17==3) & (year==1999 | year==2002)
	replace secon_partner=1 if (Q17==4 | Q17==5 | Q17==6 | Q17==7) & year==2006

	gen univ_partner=0 
	replace univ_partner=1 if Q17==4 & (year==1999 | year==2002)
	replace univ_partner=1 if (Q17==8 | Q17==9) & year==2006

	gen educ_par=.
	replace educ_par=1 if prim_partner==1
	replace educ_par=2 if secon_partner==1
	replace educ_par=3 if univ_partner==1
	
	/* additional category, in order to include observations that have no partner */
	gen educ_par2=educ_par
	replace educ_par2=0 if partner==0
	
/*drop Ceuta and Melilla*/
drop if provin==51 | provin==52

/* generate political decentralization instrument*/
	gen polit_dec=0
	replace polit_dec=1 if autono==2 | autono==4 | autono==9 | autono==10 | autono==15 | provin==48
	/* Navarre, Aragon, Catalonia, Valencia, Balearic Islands and Vizcaya */

/* merge with province-level covariate */
merge m:1 provin year using auxiliary_data, nogen

replace urban1787=8.1 if provin==42 
 /* soria: we assign the value for the whole AC region. This value is
  in between the 2 neighboring provinces within the AC: Segovia 8.2 and Burgos 7.8 */
replace urban1787=2.2 if provin==27 | provin==32 | provin==36
 /* Lugo, Orense and Pontevedra. Average for Galicia AC is 2.2
  Same source as other data (see text). */
	
/* merge with the 1860 census data that contains information about the family structure */
merge m:1 provin using 1860_census, nogen

	gen pop_dens1860=pop1860/area
	drop area

label var ipv "Intimate-partner violence"
label var stem_f "Stem family"
label var polit_dec "Political decentralization"

keep ipv sum_ipv age educ educ_par educ_par2 occup_status children civil_status city year refe prim secon univ ///
catholic gdp_pc unemp_rate ruggedness pop_dens1787 pop_dens1860 pop_dens urban1787 urban1860 social_capital ///
rainfall temp temp_range frost polit_dec n_household stem_f* provin autono phy_sex_ipv str_eco_spi_psy_ipv

qui xi: regress ipv i.age i.educ i.educ_par2 i.occup_status children refe i.civil_status i.city i.year gdp_pc unemp_rate ///
	catholic social_capital n_household ///
	pop_dens1787 pop_dens1860 pop_dens urban1787 urban1860 ///
	ruggedness temp rainfall temp_range frost stem_f, cluster(provin)
	
keep if e(sample)

save data_main_analysis, replace

********************************************************************************
*************************** Ethnographic Atlas *********************************
********************************************************************************

use EthnographicAtlasWCRevisedByWorldCultures, clear

* dependent variable
gen female_agric=. /*sex differences in agriculture. 1: only males, 5: females only. */
replace female_agric=1 if v54==1
replace female_agric=2 if v54==2
replace female_agric=3 if v54==3 |  v54==4 /*following Alesina et al. (2013), I group 'differentiated but equal participation' and 'equal participation, not marked differentiation'*/
replace female_agric=4 if v54==5
replace female_agric=5 if v54==6

*inheritance distribution for real property (land) 
gen impartible=. 
replace impartible=0 if v75==1 /*equal or relatively equal*/
replace impartible=1 if v75==2 | v75==3 | v75==4 /* exclusively or predominantly to one, ultimogeniture or primogeniture*/

* control variables
gen animal_husbandry=v4
gen agriculture=v5

gen econ_level=v30 
replace econ_level=. if v30==0

gen polit_complex=.
replace polit_complex=0 if v33==1
replace polit_complex=1 if v33==2
replace polit_complex=2 if v33==3
replace polit_complex=3 if v33==4
replace polit_complex=4 if v33==5

gen plough=1 if v39==2 | v39==3
replace plough=0 if v39==1

save ethnoatlas_analysis, replace


********************************************************************************
*************************** World Values Survey ********************************
********************************************************************************

use wvs_spain, clear

* Outcome variables: attitudes toward women
 /*When job are scarce: men should have more right to do job than women. job_men=1 if disagree, 0 if agree*/
gen job_men=.
replace job_men=1 if c001==2
replace job_men=0 if c001==1

/*Both the husband and wife should contribute to household income*/
gen both_contribute=1 if d058==1 | d058==2
replace both_contribute=0 if d058==3 | d058==4

/*On the whole, men make better political leaders than women do. 1 for strongly agree and agree, 0 for disagree strongly and disagree*/
gen men_politics=1 if d059==3 | d059==4
replace men_politics=0 if d059==1 | d059==2

/*Having a job is the best way for a woman to be an independent person. 1 for agree strongly and agree, 0 for disagree strongly and disagree*/ 
gen job_indep=0 if d063==3 | d063==4
replace job_indep=1 if d063==1 | d063==2

* Control variables
gen education=x025r /*1=lower, 2=middle, 3=upper*/

gen marital=.
replace marital=1 if x007==1 | x007==2  /*living together or married*/
replace marital=2 if x007==3 | x007==4 | x007==5  /*divorced, separated or widowed*/
replace marital=3 if x007==6  /*single, never married */

gen job_status=.
replace job_status=1 if x028==1 | x028==2 | x028==3 /*employed (full, part-time or self)*/
replace job_status=2 if x028==4 | x028==5 | x028==6 /*retired, housewife or students*/
replace job_status=3 if x028==7  /*unemployed*/

gen age=x003

gen female=1 if x001==2
replace female=0 if x001==1

* Placebo outcomes:
gen trust=.
replace trust=1 if a165==1 /*most people can be trusted*/
replace trust=0 if a165==2 /*can't be too careful*/

gen satis=. /*life satisfaction*/
replace satis=1 if a170>5 & a170<=10 /*1 if satisfied, from 6 to completely. Scale 1-10*/
replace satis=0 if a170>=1 & a170<=5 /*0 if dissatisfied, from 1 to 5*/

gen homo=.
replace homo=1 if f118>5 & f118<=10 /*1 if they find it justifiable, from 6 to completely. Scale 1-10*/
replace homo=0 if f118>=1 & f118<=5 /*0 if they find it not justifiable, from 1 to 5*/

gen eutha=.
replace eutha=1 if f122>5 & f122<=10 /*1 if they find it justifiable, from 6 to completely. Scale 1-10*/
replace eutha=0 if f122>=1 & f122<=5 /*0 if they find it not justiable, from 1 to 5*/
	 
/* I add the stem family variable aggregated at the ccaa level */
rename x048 ccaa
sort ccaa
merge m:1 ccaa using 1860_census_ccaa, nogen

/* I add the GDP per capita. Data from Spanish Institute for Statistics (INE) for the years 1990, 1995, 2000 and 2007 */
gen year=s020
sort ccaa year
merge m:1 ccaa year using wvs_auxiliar, nogen

save wvs_analysis, replace

